#! /usr/bin/env python

# For general OS related functions
import os
# For html parsing
import re
# For cli arguments
import argparse


# Gets content from a given filename
def file_get_contents(filename):
	if os.path.exists(filename):
		fp = open(filename, "r")
		content = fp.read()
		fp.close()
		return content


# Gets link data list from a given file
def get_link_data_from_file(filename):
	contents = file_get_contents(filename)
	# 0 -> url
	# 1 -> page title
	return re.findall(r'HREF="(https?://[^\s]+)".*>(.*)</A', contents)


# Generate output from a given linkdata list
def output_from_link_data(linkdata, filename):
	htmlbegin="""
<!DOCTYPE NETSCAPE-Bookmark-file-1>
<!-- This is an automatically generated file.
	 It will be read and overwritten.
	 DO NOT EDIT! -->
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=UTF-8">
<TITLE>Bookmarks</TITLE>
<H1>Bookmarks Menu</H1>

<DL><p>
	<DT><H3 UNFILED_BOOKMARKS_FOLDER="true">Merged Bookmarks</H3>
	<DL><p>
"""
	htmlend="""
</DL><p>"""
	html=htmlbegin

	for data in linkdata:
		html = html+'<DT><A HREF="'+data[0]+'">'+data[1]+'</A>\n\n'

	html = html+htmlend

	if filename != '':
		html_file = open(filename, "w")
		html_file.write(html)
		print('Output written to: '+filename)
		html_file.close()
	else:
		print(html)


# Removed duplicates from a list
def remove_duplicates(l):
	new_l = []
	for elem in l:
		if elem not in new_l:
			new_l.append(elem)
	return new_l


## ---- MAIN PROGRAM ---- ##

parser = argparse.ArgumentParser(prog='bookmerger', description='Merge multiple browser HTML bookmark files into one.')
parser.add_argument('files', metavar='files', type=str, nargs='+',
					help='HTML bookmark files')
parser.add_argument('--output', metavar='output', type=str, default='',
					help='Merged output HTML bookmark file')
args = parser.parse_args()


linkdata = list()
# Gets all the cli arguments without the first one (script name)
cli_filenames = args.files
for filename in cli_filenames:
	if os.path.exists(filename):
		links = get_link_data_from_file(filename)
		for elem in links:
			if elem not in linkdata:
				linkdata.append(elem)
	else:
		print('ERROR: '+filename+' is not found, so skipped')

linkdata = remove_duplicates(linkdata)

if args.output is None:
	output_from_link_data(linkdata)
else:
	output_from_link_data(linkdata, args.output)

